################################################################################
## Group Identities and Parliamentary Debates: Replication package
## Fiva, Nedregård and Øien (2025)

# Description:

## Code to make Figure 4: "Political Divergence by speech length"

################################################################################

####################################

#--------- Packages----------#

library(data.table)

##################################

####################################

#--------- DATA DIR----------#

##################################

data.dir    <- "../data/3_model_output"
figure.dir  <- "../results/figures"
in_text     <- "../results/in_text"


####################################

#--------- GLOBAL MACROS----------#

##################################


yMax <- 0.9#range(polars1)[2]
yLim <- c(.5 , 1)
x_axis <- seq(0, 200, 20)
yLabel <- "Expected posterior"
v_lines <- seq(0, 200, 10)
h_lines <- seq(0.5, 1, .1)
n <- 200
# Gathering median words per minute from Appendix table A3 (see the code that make that table)
median.words  <- as.numeric(readLines(paste(in_text, "median_words_per_minute.txt", sep = "/")))

####################################

#---------GATHERING DATA----------#

####################################

magnitude.data.list <- vector("list", length = 5)
names(magnitude.data.list) <- c("bloc", "gender", "age", "town", "social_background")

magnitude.data <- readRDS(file = paste(data.dir, "magnitude_list_bloc.RDS", sep = "/"))

pi_avg_matrix <- do.call(cbind, magnitude.data)
pi_avg_matrix <- pi_avg_matrix[, sort(colnames(pi_avg_matrix))]


## Split in old and new sessions

pi_avg_matrix_old <- pi_avg_matrix[,colnames(pi_avg_matrix)[1:(length(colnames(pi_avg_matrix))/2)]]

pi_avg_matrix_new <- pi_avg_matrix[,!colnames(pi_avg_matrix) %in% colnames(pi_avg_matrix_old)]

magnitude.data.list[[1]] <- list(pi_avg_matrix_old, pi_avg_matrix_new)

charac <- c("gender", "age", "town", "social_background")
labels <- c("Gender", "Age", "Urbanicity", "Father's occupation")

for (c in charac){
    magnitude.data <- readRDS(file = glue::glue("{data.dir}/magnitude_list_{c}.RDS"))
    pi_avg_matrix <- do.call(cbind, magnitude.data)
    
    pi_avg_matrix <- pi_avg_matrix[, sort(colnames(pi_avg_matrix))]
    
    
    pi_avg_matrix_old <- pi_avg_matrix[,colnames(pi_avg_matrix)[1:(length(colnames(pi_avg_matrix))/2)]]
    
    pi_avg_matrix_new <- pi_avg_matrix[,!colnames(pi_avg_matrix) %in% colnames(pi_avg_matrix_old)]
    
    
    magnitude.data.list[[c]] <- list(pi_avg_matrix_old, pi_avg_matrix_new)
    
    
}

###################################################

#--MAKING A TABLE FOR CORRECTLY GUESSING CHARACTERISTIC AFTER 1 AND 3 MIN------#

####################################################

p.per_one_and_three.minute.of.speech <- function(x){
  
  d.matrix.old <- x[[1]]
  d.matrix.new <- x[[2]]
  
  average.old <- apply(d.matrix.old, MARGIN = 1, mean)
  average.new <- apply(d.matrix.new, MARGIN = 1, mean)
  
  p.one.minute.speech.old <- round(average.old[round(median.words)]*100)
  p.one.minute.speech.new <- round(average.new[round(median.words)]*100)
  
  p.three.minute.speech.old <- round(average.old[round(median.words)*3]*100)
  p.three.minute.speech.new <- round(average.new[round(median.words)*3]*100)
  
  output.df <- data.frame("p.one.minute.speech.old"   = p.one.minute.speech.old,
                          "p.three.minute.speech.old" = p.three.minute.speech.old,
                          "p.one.minute.speech.new"   = p.one.minute.speech.new,
                          "p.three.minute.speech.new" = p.three.minute.speech.new)
  return(output.df)
  
}


output.list <- lapply(magnitude.data.list, p.per_one_and_three.minute.of.speech)
output <- rbindlist(output.list)
output$characteristic <- names(magnitude.data.list)

## Numbers used in the paper

## There must be a smarter way to do this i.e. make one table and point to the right number within
## .tex, but lets do it simple...

p.one.minute.speech.bloc.old <- output[characteristic == "bloc",]$p.one.minute.speech.old

writeLines(as.character(p.one.minute.speech.bloc.old), 
           paste(in_text, "p.one.minute.speech.bloc.old.txt", sep = "/"))

p.one.minute.speech.bloc.new <- output[characteristic == "bloc",]$p.one.minute.speech.new

writeLines(as.character(p.one.minute.speech.bloc.new), 
           paste(in_text, "p.one.minute.speech.bloc.new.txt", sep = "/"))

p.three.minute.speech.bloc.old <- output[characteristic == "bloc",]$p.three.minute.speech.old

writeLines(paste0("(",p.three.minute.speech.bloc.old, ")"), 
           paste(in_text, "p.three.minute.speech.bloc.old.txt", sep = "/"))


p.three.minute.speech.bloc.new <- output[characteristic == "bloc",]$p.three.minute.speech.new

writeLines(paste0("(",p.three.minute.speech.bloc.new, ")"), 
           paste(in_text, "p.three.minute.speech.bloc.new.txt", sep = "/"))

min.one.minute.not.bloc <- min(c(output[characteristic != "bloc",]$p.one.minute.speech.old,
                                 output[characteristic != "bloc",]$p.one.minute.speech.new))

writeLines(as.character(min.one.minute.not.bloc), 
           paste(in_text, "min.one.minute.not.bloc.txt", sep = "/"))

max.one.minute.not.bloc <- max(c(output[characteristic != "bloc",]$p.one.minute.speech.old,
                                 output[characteristic != "bloc",]$p.one.minute.speech.new))

writeLines(as.character(max.one.minute.not.bloc), 
           paste(in_text, "max.one.minute.not.bloc.txt", sep = "/"))



####################################

#------ FUNCTION FIGURE 4 IN PAPER ---------###

####################################

# magnitude.plot.function

func.magn.plot.ind <- function(d.matrix.old, d.matrix.new, main.title = "", 
                               n.points = 10, median.words = 50, three.min.mark = T, 
                               one.min.mark = T, ind.sessions = T, legend = T,
                               pch.new = 1, pch.old = 2){
    
    
    
    average.old <- apply(d.matrix.old, MARGIN = 1, mean)
    average.new <- apply(d.matrix.new, MARGIN = 1, mean)
    
    
    
    plot(x_axis, rep(0, length(x_axis)), type = "n", ylim = yLim, 
         xlab = "", xaxt = 'n', yaxt = 'n', ylab = yLabel, frame.plot = T, bty = "n",
         main = main.title, cex.lab = 1.2)
    
    #axis(side = c(1, 2))
    
    
    for (i in v_lines) abline(v = i, col = "grey90", lwd = 0.6, lty = 2)
    for (i in h_lines) abline(h = i, col = "grey90", lwd = 0.6, lty = 2)
    
    
    title(xlab = "Number of words", line = 2)
    
    if (one.min.mark == T){
        abline(v = median.words, col = "black")
        text(x = median.words - 17, y = 0.9, labels = "One minute \n of speech",
             col = "black", cex = .9)
    }
    
    
    if (three.min.mark == T){
        
        text(x = 3*median.words - 21, y = 0.9, labels = "Three minutes \n of speech",
             col = "black", cex = .9)
        
        abline(v = 3*median.words, col = "black")
        
    }
    
    
    
    
    lines(0:n, c(.5, average.old), col = "#99999980", lty = 1, lwd = 1.5)
    
    lines(0:n, c(.5, average.new), col = "black", lty = 1, lwd = 1.5)
    
    if (n.points > 0){
        
        points(seq(0, n, n.points), c(.5, average.old)[c(1, seq(0, n, n.points))], pch = pch.old, 
               col = "#99999980")
        points(seq(0, n, n.points), c(.5, average.new)[c(1, seq(0, n, n.points))], pch = pch.new, 
               col = "black")
        
        
    }
    
    
    if (ind.sessions == T){
        
        for(i in colnames(d.matrix.old)){
            lines(0:n, c(.5, d.matrix.old[, i]),  col = "#99999980",  lty = 1)
        }
        
        for(i in colnames(d.matrix.new)){
            lines(0:n, c(.5, d.matrix.new[, i]),  col = "#99999980",  lty = 1)
        }
        
    }    
    

    
    if (legend == T){
        legend("topleft", bty = "n", lwd = 2, ncol = 1,
               legend = c("Average sessions 1981 - 1999", 
                          "Average sessions 2000 - 2019",
                          "Individual sessions"),
               lty = c(1, 1, 1),
               pch = c(1, 2, NA),
               col = c("black", "black","#99999980"))
    }
    
    axis(1, at = x_axis, lwd = 0.4)
    
    axis(2, lwd = 0.4) 
    
    
    
}

####################################

#------ FIGURE IN PAPER ---------###

####################################

pdf(paste(figure.dir, "fig4.pdf", sep = "/"), width = 9, height = 9,
    pointsize = 12)




par(mfrow=c(3,2), mar=c(4,4,3,2), las=1)

charac <- c("bloc", "gender", "age", "town", "social_background")
labels <- c("Bloc", "Gender", "Age", "Urbanicity", "Father's occupation")


# Macros for legend
ind.sess <- F
pch.new <- 20
pch.old <- 17

for (c in charac){
    
    func.magn.plot.ind(d.matrix.old = magnitude.data.list[[c]][[1]], 
                       d.matrix.new = magnitude.data.list[[c]][[2]], 
                       one.min.mark = T, 
                       three.min.mark = T, 
                       legend = F, 
                       ind.sessions = F, 
                       main.title = labels[which(charac %in% c)],
                       pch.new = pch.new,
                       pch.old = pch.old,
                       median.words = median.words)
    
}

#------> LEGEND

plot(x_axis, rep(0, length(x_axis)), type = "n", ylim = yLim, 
     xlab = "", xaxt = 'n', yaxt = 'n', ylab = "", bty = "n")

if (ind.sess == T){
    legend("center", bty = "n", lwd = 2, ncol = 1,
           legend = c("Average sessions 2002 - 2021",
                      "Average sessions 1982 - 2001",
                      "Individual sessions"),
           lty = c(1, 1, 1),
           pch = c(pch.new, pch.old, NA),
           col = c("black", "#99999980","#99999980"), cex = 1.5)
} else {
    
    legend("center", bty = "n", lwd = 2, ncol = 1,
           legend = c("Average sessions 2002 - 2021",
                      "Average sessions 1982 - 2001" 
           ),
           lty = c(1, 1),
           pch = c(pch.new, pch.old),
           col = c("black", "#99999980"), cex = 1.5)
    
    
}

dev.off()










